go88. onl

$89543

Model HF Main Model Name HF Draft Model Name (speculative decoding) Size Format API GPU GPU

Quantity

Add to cart

Add to wish list

SKU:7746
Category:Casino Games
Tags:roulette

Description

Product description

　　Model

　　HF Main Model Name

　　HF Draft Model Name (speculative decoding)

　　Size

　　Format

　　API

　　GPU

　　GPU Mem

　　Run

　　Duration

　　Total

　　Correct Random Guesses

　　Prompt tokens

　　tk/s

　　Completion tokens

　　tk/s

　　claude-3-5-sonnet-20241022

　　Anthropic

　　1/3

　　31m 50s

　　340/410

　　82.93%

　　694458

　　362.78

　　97438

　　50.90

　　claude-3-5-sonnet-20241022

　　Anthropic

　　2/3

　　31m 39s

　　338/410

　　82.44%

　　694458

　　364.82

　　97314

　　51.12

　　🆕 claude-3-5-sonnet-20241022

　　Anthropic

　　3/3

　　28m 56s

　　337/410

　　82.20%

　　867478

　　498.45

　　84785

　　48.72

　　gemini-1.5-pro-002

　　Gemini

　　1/3

　　31m 7s

　　335/410

　　81.71%

　　648675

　　346.82

　　78311

　　41.87

　　🆕 gemini-1.5-pro-002

　　Gemini

　　2/3

　　29m 52s

　　333/410

　　81.22%

　　648675

　　361.38

　　77030

　　42.91

　　gemini-1.5-pro-002

　　Gemini

　　3/3

　　30m 40s

　　327/410

　　79.76%

　　648675

　　351.73

　　76063

　　41.24

　　QwQ-32B-Preview (8.0bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38436MiB

　　1/2

　　2h 3m 30s

　　325/410

　　79.27%

　　0/2, 0.00%

　　656716

　　88.58

　　327825

　　44.22

　　QwQ-32B-Preview (8.0bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38436MiB

　　2/2

　　2h 3m 35s

　　324/410

　　79.02%

　　656716

　　88.52

　　343440

　　46.29

　　Athene-V2-Chat (72B, 4.65bpw EXL2, Q4 cache)

　　wolfram/Athene-V2-Chat-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　44496MiB

　　1/3

　　2h 13m 5s

　　326/410

　　79.51%

　　656716

　　82.21

　　142256

　　17.81

　　Athene-V2-Chat (72B, 4.65bpw EXL2, Q4 cache)

　　wolfram/Athene-V2-Chat-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　44496MiB

　　2/3

　　2h 14m 53s

　　317/410

　　77.32%

　　656716

　　81.11

　　143659

　　17.74

　　🆕 Athene-V2-Chat (72B, 4.65bpw EXL2, Q4 cache)

　　wolfram/Athene-V2-Chat-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　44496MiB

　　3/3

　　1h 49m 40s

　　312/410

　　76.10%

　　805136

　　122.30

　　115284

　　17.51

　　Qwen2.5-72B-Instruct (4.65bpw EXL2, Q4 cache)

　　LoneStriker/Qwen2.5-72B-Instruct-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　2x RTX 3090

　　41150MiB

　　1/2

　　3h 7m 58s

　　320/410

　　78.05%

　　656716

　　58.21

　　139499

　　12.36

　　Qwen2.5-72B-Instruct (4.65bpw EXL2, Q4 cache)

　　LoneStriker/Qwen2.5-72B-Instruct-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　2x RTX 3090

　　41150MiB

　　2/2

　　3h 5m 19s

　　319/410

　　77.80%

　　656716

　　59.04

　　138135

　　12.42

　　DeepSeek-V3

　　deepseek-ai/DeepSeek-V3

　　671B

　　DeepSeek

　　1/4

　　20m 22s

　　320/410

　　78.05%

　　628029

　　512.38

　　66807

　　54.50

　　DeepSeek-V3

　　deepseek-ai/DeepSeek-V3

　　671B

　　DeepSeek

　　2/4

　　27m 43s

　　320/410

　　78.05%

　　628029

　　376.59

　　66874

　　40.10

　　DeepSeek-V3

　　deepseek-ai/DeepSeek-V3

　　671B

　　DeepSeek

　　3/4

　　19m 45s

　　319/410

　　77.80%

　　628029

　　528.39

　　64470

　　54.24

　　DeepSeek-V3

　　deepseek-ai/DeepSeek-V3

　　671B

　　DeepSeek

　　4/4

　　19m 45s

　　319/410

　　77.80%

　　628029

　　375.73

　　69531

　　41.60

　　gpt-4o-2024-08-06

　　OpenAI

　　1/2

　　34m 54s

　　320/410

　　78.05%

　　1/2, 50.00%

　　631448

　　300.79

　　99103

　　47.21

　　gpt-4o-2024-08-06

　　OpenAI

　　2/2

　　42m 41s

　　316/410

　　77.07%

　　1/3, 33.33%

　　631448

　　246.02

　　98466

　　38.36

　　mistral-large-2407 (123B)

　　mistralai/Mistral-Large-Instruct-2407

　　123B

　　Mistral

　　1/2

　　40m 23s

　　310/410

　　75.61%

　　696798

　　287.13

　　79444

　　32.74

　　mistral-large-2407 (123B)

　　mistralai/Mistral-Large-Instruct-2407

　　123B

　　Mistral

　　2/2

　　46m 55s

　　308/410

　　75.12%

　　0/1, 0.00%

　　696798

　　247.21

　　75971

　　26.95

　　Llama-3.1-405B-Instruct-FP8

　　meta-llama/Llama-3.1-405B-Instruct-FP8

　　405B

　　IONOS

　　1/2

　　2h 5m 28s

　　311/410

　　75.85%

　　648580

　　86.11

　　79191

　　10.51

　　Llama-3.1-405B-Instruct-FP8

　　meta-llama/Llama-3.1-405B-Instruct-FP8

　　405B

　　IONOS

　　2/2

　　2h 10m 19s

　　307/410

　　74.88%

　　648580

　　82.90

　　79648

　　10.18

　　mistral-large-2411 (123B)

　　mistralai/Mistral-Large-Instruct-2411

　　123B

　　Mistral

　　1/2

　　41m 46s

　　302/410

　　73.66%

　　1/3, 33.33%

　　696798

　　277.70

　　82028

　　32.69

　　mistral-large-2411 (123B)

　　mistralai/Mistral-Large-Instruct-2411

　　123B

　　Mistral

　　2/2

　　32m 47s

　　300/410

　　73.17%

　　0/1, 0.00%

　　696798

　　353.53

　　77998

　　39.57

　　chatgpt-4o-latest @ 2024-11-18

　　OpenAI

　　1/2

　　28m 17s

　　302/410

　　73.66%

　　2/4, 50.00%

　　631448

　　371.33

　　146558

　　86.18

　　chatgpt-4o-latest @ 2024-11-18

　　OpenAI

　　2/2

　　28m 31s

　　298/410

　　72.68%

　　2/2, 100.00%

　　631448

　　368.19

　　146782

　　85.59

　　gpt-4o-2024-11-20

　　OpenAI

　　1/2

　　25m 35s

　　296/410

　　72.20%

　　1/7, 14.29%

　　631448

　　410.38

　　158694

　　103.14

　　gpt-4o-2024-11-20

　　OpenAI

　　2/2

　　26m 10s

　　294/410

　　71.71%

　　1/7, 14.29%

　　631448

　　400.95

　　160378

　　101.84

　　Llama-3.3-70B-Instruct (4.0bpw EXL2)

　　LoneStriker/Llama-3.3-70B-Instruct-4.0bpw-h6-exl2

　　70B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47148MiB

　　1/2

　　2h 2m 33s

　　293/410

　　71.46%

　　648580

　　88.15

　　87107

　　11.84

　　Llama-3.3-70B-Instruct (4.0bpw EXL2)

　　LoneStriker/Llama-3.3-70B-Instruct-4.0bpw-h6-exl2

　　70B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47148MiB

　　2/2

　　1h 33m 59s

　　293/410

　　71.46%

　　534360

　　94.70

　　89510

　　15.86

　　Llama-3.1-70B-Instruct

　　meta-llama/Llama-3.1-70B-Instruct

　　70B

　　IONOS

　　1/2

　　41m 12s

　　291/410

　　70.98%

　　3/12, 25.00%

　　648580

　　261.88

　　102559

　　41.41

　　Llama-3.1-70B-Instruct

　　meta-llama/Llama-3.1-70B-Instruct

　　70B

　　IONOS

　　2/2

　　39m 48s

　　287/410

　　70.00%

　　3/14, 21.43%

　　648580

　　271.12

　　106644

　　44.58

　　Llama-3.1-Nemotron-70B-Instruct (4.25bpw EXL2)

　　bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-exl2_4_25

　　70B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40104MiB

　　1/2

　　2h 13m 3s

　　290/410

　　70.73%

　　640380

　　80.18

　　157235

　　19.69

　　Llama-3.1-Nemotron-70B-Instruct (4.25bpw EXL2)

　　bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-exl2_4_25

　　70B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40104MiB

　　2/2

　　2h 13m 15s

　　287/410

　　70.00%

　　0/1, 0.00%

　　640380

　　80.07

　　157471

　　19.69

　　QVQ-72B-Preview (4.65bpw EXL2, max_tokens=16384)

　　wolfram/QVQ-72B-Preview-4.65bpw-h6-exl2

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　46260MiB

　　1/2

　　3h 43m 12s

　　290/410

　　70.73%

　　1/3, 33.33%

　　656716

　　49.02

　　441187

　　32.93

　　QVQ-72B-Preview (4.65bpw EXL2, max_tokens=16384)

　　wolfram/QVQ-72B-Preview-4.65bpw-h6-exl2

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　46260MiB

　　2/2

　　3h 47m 29s

　　284/410

　　69.27%

　　0/2, 0.00%

　　656716

　　48.10

　　450363

　　32.99

　　gemini-1.5-flash-002

　　Gemini

　　1/2

　　13m 19s

　　288/410

　　70.24%

　　1/6, 16.67%

　　648675

　　808.52

　　80535

　　100.38

　　gemini-1.5-flash-002

　　Gemini

　　2/2

　　22m 30s

　　285/410

　　69.51%

　　2/7, 28.57%

　　648675

　　479.42

　　80221

　　59.29

　　Llama-3.2-90B-Vision-Instruct

　　meta-llama/Llama-3.2-90B-Vision-Instruct

　　90B

　　Azure

　　1/2

　　33m 6s

　　289/410

　　70.49%

　　4/7, 57.14%

　　640380

　　321.96

　　88997

　　44.74

　　Llama-3.2-90B-Vision-Instruct

　　meta-llama/Llama-3.2-90B-Vision-Instruct

　　90B

　　Azure

　　2/2

　　31m 31s

　　281/410

　　68.54%

　　2/5, 40.00%

　　640380

　　338.10

　　85381

　　45.08

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-3B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45880MiB

　　1/7

　　41m 59s

　　289/410

　　70.49%

　　656716

　　260.29

　　92126

　　36.51

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40036MiB

　　2/7

　　34m 24s

　　286/410

　　69.76%

　　656716

　　317.48

　　89487

　　43.26

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-3B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45880MiB

　　3/7

　　41m 27s

　　283/410

　　69.02%

　　0/1, 0.00%

　　656716

　　263.62

　　90349

　　36.27

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　bartowski/Qwen2.5-Coder-7B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43688MiB

　　4/7

　　42m 32s

　　283/410

　　69.02%

　　0/1, 0.00%

　　656716

　　256.77

　　90899

　　35.54

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　bartowski/Qwen2.5-Coder-7B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43688MiB

　　5/7

　　44m 34s

　　282/410

　　68.78%

　　0/1, 0.00%

　　656716

　　245.24

　　96470

　　36.03

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38620MiB

　　6/7

　　1h 2m 8s

　　282/410

　　68.78%

　　656716

　　175.98

　　92767

　　24.86

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40036MiB

　　7/7

　　34m 56s

　　280/410

　　68.29%

　　656716

　　312.66

　　91926

　　43.76

　　Mistral-Large-Instruct-2411 (123B, 3.0bpw EXL2)

　　MikeRoz/mistralai_Mistral-Large-Instruct-2411-3.0bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47068MiB

　　1/2

　　1h 26m 26s

　　284/410

　　69.27%

　　1/3, 33.33%

　　696798

　　134.23

　　79925

　　15.40

　　Mistral-Large-Instruct-2411 (123B, 3.0bpw EXL2)

　　MikeRoz/mistralai_Mistral-Large-Instruct-2411-3.0bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47068MiB

　　2/2

　　1h 26m 10s

　　275/410

　　67.07%

　　0/2, 0.00%

　　696798

　　134.67

　　79778

　　15.42

　　🆕 Phi-4 (14B, Unsloth, GGUF)

　　unsloth/phi-4-GGUF

　　14B

　　GGUF

　　llama.cpp

　　RTX 6000

　　31978MiB

　　1/2

　　1h 19m 51s

　　278/410

　　67.80%

　　1/6, 16.67%

　　639591

　　133.40

　　133610

　　27.87

　　🆕 Phi-4 (14B, Unsloth, GGUF)

　　unsloth/phi-4-GGUF

　　14B

　　GGUF

　　llama.cpp

　　RTX 6000

　　31978MiB

　　2/2

　　1h 19m 41s

　　278/410

　　67.80%

　　1/6, 16.67%

　　639591

　　133.67

　　133610

　　27.92

　　🆕 Phi-4 (14B, Unsloth, HF)

　　unsloth/phi-4

　　14B

　　TabbyAPI

　　RTX 6000

　　1/2

　　1h 38m 29s

　　274/410

　　66.83%

　　1/3, 33.33%

　　635081

　　107.42

　　113731

　　19.24

　　🆕 Phi-4 (14B, Unsloth, HF)

　　unsloth/phi-4

　　14B

　　TabbyAPI

　　RTX 6000

　　2/2

　　1h 39m 32s

　　273/410

　　66.59%

　　1/3, 33.33%

　　635081

　　106.29

　　113712

　　19.03

　　🆕 Phi-4 (14B, Microsoft, HF)

　　microsoft/phi-4

　　14B

　　TabbyAPI

　　RTX 6000

　　31394MiB

　　1/2

　　1h 7m 44s

　　272/410

　　66.34%

　　1/3, 33.33%

　　635081

　　156.15

　　113358

　　27.87

　　🆕 Phi-4 (14B, Microsoft, HF)

　　microsoft/phi-4

　　14B

　　TabbyAPI

　　RTX 6000

　　31394MiB

　　2/2

　　1h 7m 44s

　　271/410

　　66.10%

　　1/3, 33.33%

　　635081

　　156.10

　　113384

　　27.87

　　Mistral-Large-Instruct-2407 (123B, 2.75bpw EXL2)

　　turboderp/Mistral-Large-Instruct-2407-123B-exl2_2.75bpw

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　1/2

　　1h 8m 8s

　　271/410

　　66.10%

　　696798

　　170.29

　　66670

　　16.29

　　Mistral-Large-Instruct-2407 (123B, 2.75bpw EXL2)

　　turboderp/Mistral-Large-Instruct-2407-123B-exl2_2.75bpw

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　2/2

　　1h 10m 38s

　　268/410

　　65.37%

　　1/3, 33.33%

　　696798

　　164.23

　　69182

　　16.31

　　🆕 Qwen2-VL-72B-Instruct (4.5bpw EXL2)

　　turboderp/Qwen2-VL-72B-Instruct-exl2_4.5bpw

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43554MiB

　　1/2

　　1h 10m 51s

　　255/410

　　62.20%

　　30/3, 0.00%

　　656716

　　154.36

　　71752

　　16.87

　　🆕 Qwen2-VL-72B-Instruct (4.5bpw EXL2)

　　turboderp/Qwen2-VL-72B-Instruct-exl2_4.5bpw

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43554MiB

　　2/2

　　1h 26m 40s

　　255/410

　　62.20%

　　1/6, 16.67%

　　656716

　　126.20

　　88249

　　16.96

　　Mistral-Large-Instruct-2411 (123B, 2.75bpw EXL2)

　　wolfram/Mistral-Large-Instruct-2411-2.75bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　1/2

　　1h 11m 50s

　　267/410

　　65.12%

　　1/4, 25.00%

　　696798

　　161.53

　　70538

　　16.35

　　Mistral-Large-Instruct-2411 (123B, 2.75bpw EXL2)

　　wolfram/Mistral-Large-Instruct-2411-2.75bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　2/2

　　1h 13m 50s

　　243/410

　　59.27%

　　0/4, 0.00%

　　696798

　　157.18

　　72718

　　16.40

　　Falcon3-10B-Instruct

　　tiiuae/Falcon3-10B-Instruct

　　10B

　　Ollama

　　RTX 6000

　　20906MiB

　　1/2

　　35m 15s

　　251/410

　　61.22%

　　2/5, 40.00%

　　702578

　　331.57

　　75501

　　35.63

　　Falcon3-10B-Instruct

　　tiiuae/Falcon3-10B-Instruct

　　10B

　　Ollama

　　RTX 6000

　　20906MiB

　　2/2

　　35m 21s

　　251/410

　　61.22%

　　2/5, 40.00%

　　702578

　　330.66

　　75501

　　35.53

　　mistral-small-2409 (22B)

　　mistralai/Mistral-Small-Instruct-2409

　　22B

　　Mistral

　　1/2

　　25m 3s

　　243/410

　　59.27%

　　1/4, 25.00%

　　696798

　　462.38

　　73212

　　48.58

　　mistral-small-2409 (22B)

　　mistralai/Mistral-Small-Instruct-2409

　　22B

　　Mistral

　　2/2

　　20m 45s

　　239/410

　　58.29%

　　1/4, 25.00%

　　696798

　　558.10

　　76017

　　60.89

　　🆕 Aya-Expanse-32B (8.0bpw EXL2)

　　lucyknada/CohereForAI_aya-expanse-32b-exl2_8.0bpw

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　33686MiB

　　1/2

　　43m 18s

　　212/410

　　51.71%

　　0/1, 0.00%

　　661930

　　254.04

　　60728

　　23.31

　　🆕 Aya-Expanse-32B (8.0bpw EXL2)

　　lucyknada/CohereForAI_aya-expanse-32b-exl2_8.0bpw

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　33686MiB

　　2/2

　　42m 27s

　　211/410

　　51.46%

　　0/4, 0.00%

　　661930

　　259.50

　　59557

　　23.35

go88. onl

Product description

Related products

ee88. to

go88 v

789bet asian

dk new88